** Data reading and variable selection from raw data
** Social Stratification in Eastern Europe After 1989: Gerneral Population Survey (1993)- Russia

** 01. Reading data **

cap log close
clear all
set more off
cd /*insert you work directory here*/
use /*read your data here*/  


** 02. Consructing year and country variables **

ge year=1993
lab var year "survey year"

drop country
ge country=643
lab var country "ISO country code"
//Russia:643 (see "ISO Country Codes.pdf) 


** 03. ID variables **

ge pid=respid1
lab var pid "person id"


** 04. Basic Demographics (Sex and Age/birth year) **

tab sex

replace birth=. if birth<0
ge birthyr=birth+1900
ge age=year-birthyr
lab var birthyr "year of birth"
lab var age "age"


** 05. Siblings **

ge nsibs=sibling

//missing
lab def nsibs -8 "refused to answer" -7 "don't know" -2 "blank"
lab val nsibs nsibs


** 06. Own education **

//highest education in CASMIN code categories
ge educ=hiedrc
lab var educ "respondent's highest completed education"
//although the variable name is "highest completed education", respondent does not necessarily finished the reported level of education
//can get an approximation easily by recoding the incomplete levels into the previous level

lab def educ -8 "refused to answer" -7 "don't know" -2 "missing" -1 "skipped (not applicable--not economically active, not alive, too young, etc.)" ///
0 "none" 1 "primary education-incomplete" 2 "primary education-completed" ///
3 "Completed primary education and basic vocational qualification" 4 "Secondary, incomplete, no certificate" ///
5 "Secondary, vocational qualification" 6 "Secondary, academic certificate/e.g., matura" ///
7 "Higher education, incomplete, no certificate/degree" 8 "Higher education, tertiary certificate/degree" 9 "Higher education, post-graduate study"

lab val educ educ

//number of years spent in school
ge educ_yr=hiedyrs
lab var educ_yr "respondent's number of years spent in school"
lab def educ_yr -2 "missing"
lab val educ_yr educ_yr


** 07. Parents' education: Father and/or Mother **

ge faeduc=idadeduc
lab var faeduc "father's highest education"
lab val faeduc educ 
//although the variable name is "highest completed education", respondent does not necessarily finished the reported level of education
//can get an approximation easily by recoding the incomplete levels into the previous level

ge faeduc_yr=ydadeduc
lab var faeduc_yr "father's years of education"
lab val faeduc_yr educ_yr

ge moeduc=imomeduc
lab var moeduc "mother's education"
lab val moeduc educ

ge moeduc_yr=ymomeduc
lab var moeduc_yr "mother's years of education"
lab val moeduc_yr educ_yr


** 08. Own occupation **

//ISCO code
ge occ_ISCO=occc
lab var occ_ISCO "ISCO code for the current occupation"
ge lastocc_ISCO=occf
lab var lastocc_ISCO "ISCO code for the most recent occupation"
ge occ93_ISCO=occ93
lab var occ93_ISCO "ISCO code for the occupation held in January 1993"
ge occ88_ISCO=occ88
lab var occ88_ISCO "ISCO code for the occupation held in January 1988"

lab def occ_ISCO -8 "refused to answer" -7 "don't know" -3 "wild code" -2 "blank" -1 "skipped"
lab val occ_ISCO lastocc_ISCO occ93_ISCO occ88_ISCO occ_ISCO

//ISEI code
ge occ_ISEI=isec
lab var occ_ISEI "ISEI code for the current occupation"
ge lastocc_ISEI=isef
lab var lastocc_ISEI "ISEI code for the most recent occupation"
ge occ93_ISEI=ise93
lab var occ93_ISEI "ISEI code for the occupation held in January 1993"
ge occ88_ISEI=ise88
lab var occ88_ISEI "ISEI code for the occupation held in January 1988"

lab def occ_ISEI -8 "refused to answer" -7 "don't know" -3 "wild code" -2 "blank" -1 "skipped"
lab val occ_ISEI lastocc_ISEI occ93_ISEI occ88_ISEI occ_ISEI

//EGP category
ge occ_EGP=egpc
lab var occ_EGP "EGP code for the current occupation"
ge lastocc_EGP=egpf
lab var lastocc_EGP "EGP code for the most recent occupation"
ge occ93_EGP=egp93
lab var occ93_EGP "EGP code for the occupation held in January 1993"
ge occ88_EGP=egp88
lab var occ88_EGP "EGP code for the occupation held in January 1988"

lab def occ_EGP -8 "refused to answer" -7 "don't know" -3 "wild code" -2 "blank" -1 "skipped" ///
1 "Class I. Higher-grade professionals, administrators, and officials; managers in large industrial establishments; large proprietors" ///
2 "Class II. Lower-grade professionals, administrators, and officials; higher-grade technicians; managers in small industrial establishments; supervisors of non-manual employees" ///
3 "Classes IIIa and IIIb. Routine non-manual employuees, higher-grade (administration and commerce)/Routine non-manual employees, lower grade (sales and services)" ///
4 " Class IVa. Small proprietors, artisans, etc., with employees" 5 "Class IVb. Small proprietors, artisans, etc., without employees" ///
7 "Class V. Lower-grade technicians; supervisors of manual workers" 8 "Class VI. Skilled manual workers" ///
9 "Class VIIa. Semi- and unskilled manual workers/not in agriculture, etc" 10 "Class VIIb. Agricultural and other workers in primary production" ///
11 "Class IVc. Farmers and smallholders; other self-employed workers in primary production"

lab val occ_EGP lastocc_EGP occ93_EGP occ88_EGP occ_EGP

//EGP six category
ge occ_SIX=sixc
lab var occ_SIX "SIX code for the current occupation"
ge lastocc_SIX=sixf
lab var lastocc_SIX "SIX code for the most recent occupation"
ge occ93_SIX=six93
lab var occ93_SIX "SIX code for the occupation held in January 1993"
ge occ88_SIX=six88
lab var occ88_SIX "SIX code for the occupation held in January 1988"

lab def occ_SIX -8 "refused to answer" -7 "don't know" -3 "wild code" -2 "blank" -1 "skipped" 1 "Professionals and aministrators" ///
2 "Routine non-manual" 3 "Small proprietors" 4 "Skilled manual workers and manual supervisors" 5 "Semi- and unskilled manual workers" ///
6 "Farmers and farm laborers" 

lab val occ_SIX lastocc_SIX occ93_SIX occ88_SIX occ_SIX

//Industrial branch
ge ind=indc
lab var ind "Industrial branch for the current occupation"
ge lastind=indf
lab var lastind "Industrial branch for the most recent occupation"
tab ind93
tab ind88

lab def ind -8 "refused to answer" -7 "don't know" -3 "wild code" -2 "blank" -1 "skipped" 1 "Agriculture, hunting, forestry, or fishing" ///
2 "Mining and quarrying" 3 "Manufacturing" 4 "Construction" 5 "Electricity, gas and water supply" 6 "Transport, storage and communications" ///
7 "Wholesale and retail trade" 8 "Hotels and restaurants" 9 "Education, science, culture, and mass communications (TV, radio, newspapers)" ///
10 "Social services and health" 11 "Financial services" 12 "Other services" 13 "Private households" 14 "Public administration" ///
15 "Armed forces, police" 16 "Other industrial branches"

lab val ind lastind ind

//Organisation type
ge org=orgc
lab var org "Organization type code for the current occupation"
ge lastorg=orgf
lab var lastorg "Organization type code for the most recent occupation"
tab org93
tab org88

lab def org -8 "refused to answer" -7 "don't know" -3 "wild code" -2 "blank" -1 "skipped" 1 "State enterprise/authority" ///
2 "Local (district, municipal) enterprise/authority" 3 "Cooperative/employee-owned firm" ///
4 "State-owned enterprise being prepared for privatization or currently in process of privatization" ///
5 "Formerly state owned enterprise now privatized" 6 "Enterprise never state owned, founded as private; no foreign ownership" ///
7 "Joint venture (enterprise founded as private with partly foreign capital)" 8 "Foreign owned firm" 9 "Other"

lab val org lastorg org

//Self-employment
ge selfemp=sec
lab var selfemp "Self-employed? Current occupation"
ge lastselfemp=sef
lab var lastselfemp "Self-employed? Most recent occupation"
ge selfemp93=se93
lab var selfemp93 "Self-employed? Occupation held in January 1993"
ge selfemp88=se88
lab var selfemp88 "Self-employed? Occupation held in January 1988"
lab def selfemp -8 "refused to answer" -7 "don't know" -3 "wild code" -2 "blank" -1 "skipped" 1 "yes" 2 "no"
lab val selfemp lastselfemp selfemp93 selfemp88 selfemp

//Number of people supervised
ge numsp=subc
lab var numsp "Number supervised in the current occupation"
ge lastnumsp=subf
lab var lastnumsp "Number supervised in the most recent occupation"
ge numsp93=sub93
lab var numsp93 "Number supervised in the occupation held in January 1993"
ge numsp88=sub88
lab var numsp88 "Number supervised in the occupation held in January 1988"
lab def numsp -8 "refused to answer" -7 "don't know" -3 "wild code" -2 "blank" -1 "skipped" 1 "none" 2 "1-9" 3 "10 or more"
lab val numsp lastnumsp numsp93 numsp88 numsp


** 09. Parents' occupation **

//Activity
ge faact14=dadact14
lab var faact14 "Father's activity when respondent age 14"
ge faact48=dadact48
lab var faact48 "Father's activity in 1948"
ge faact52=dadact52
lab var faact52 "Father's activity in 1952"
ge faact63=dadact63
lab var faact63 "Father's activity in 1963"
ge faact73=dadact73
lab var faact73 "Father's activity in 1973"

lab def act -8 "refused to answer" -7 "don't know" -3 "wild code" -2 "blank" ///
-1 "skipped(not applicable--not economically active, not alive, too young, etc.)" ///
1 "Working or doing anything at all to earn money or help feed yourself or your family (including tending a family plot, hawking goods on the street, changing money, etc.)" ///
2 "Unemployed and looking for work" 3 "Going to school, studying" 4 "Keeping house" 5 "On maternity leave" 6 "Retired" 7 "Unable to work" ///
8 "In military" 9 "In jail" 10 "Doing forced labor" 11 "In concentration camp" 12 "Not working, not doing anything else" 13 "Other activity"

lab val faact14 faact48 faact52 faact63 faact73 act

ge moact14=momact14
lab var moact14 "Mother's activity when respondent age 14"
ge moact48=momact48
lab var moact48 "Mother's activity in 1948"
ge moact52=momact52
lab var moact52 "Mother's activity in 1952"
ge moact63=momact63
lab var moact63 "Mother's activity in 1963"
ge moact73=momact73
lab var moact73 "Mother's activity in 1973"
lab val moact14 moact48 moact52 moact63 moact73 act

//ISCO code
ge faocc14_ISCO=dadocc14
lab var faocc14_ISCO "Father's occupation (expanded ISCO code) when respondent 14"
ge faocc48_ISCO=dadocc48
lab var faocc48_ISCO "Father's occupation (expanded ISCO code) in 1948"
ge faocc52_ISCO=dadocc52
lab var faocc52_ISCO "Father's occupation (expanded ISCO code) in 1952"
ge faocc63_ISCO=dadocc63
lab var faocc63_ISCO "Father's occupation (expanded ISCO code) in 1963"
ge faocc73_ISCO=dadocc73
lab var faocc73_ISCO "Father's occupation (expanded ISCO code) in 1973"
lab val faocc14_ISCO faocc48_ISCO faocc52_ISCO faocc63_ISCO faocc73_ISCO occ_ISCO

ge moocc14_ISCO=momocc14
lab var moocc14_ISCO "Mother's occupation (expanded ISCO code) when respondent 14"
ge moocc48_ISCO=momocc48
lab var moocc48_ISCO "Mother's occupation (expanded ISCO code) in 1948"
ge moocc52_ISCO=momocc52
lab var moocc52_ISCO "Mother's occupation (expanded ISCO code) in 1952"
ge moocc63_ISCO=momocc63
lab var moocc63_ISCO "Mother's occupation (expanded ISCO code) in 1963"
ge moocc73_ISCO=momocc73
lab var moocc73_ISCO "Mother's occupation (expanded ISCO code) in 1973"
lab val moocc14_ISCO moocc48_ISCO moocc52_ISCO moocc63_ISCO moocc73_ISCO occ_ISCO

//Industrial branch
ge faind14=dadind14
lab var faind14 "Father's industrial branch when respondent 14"
ge faind48=dadind48
lab var faind48 "Father's industrial branch in 1948"
ge faind52=dadind52
lab var faind52 "Father's industrial branch in 1952"
ge faind63=dadind63
lab var faind63 "Father's industrial branch in 1963"
ge faind73=dadind73
lab var faind73 "Father's industrial branch in 1973"
lab val faind14 faind48 faind52 faind63 faind73 ind

ge moind14=momind14
lab var moind14 "Mother's industrial branch when respondent 14"
ge moind48=momind48
lab var moind48 "Mother's industrial branch in 1948"
ge moind52=momind52
lab var moind52 "Mother's industrial branch in 1952"
ge moind63=momind63
lab var moind63 "Mother's industrial branch in 1963"
ge moind73=momind73
lab var moind73 "Mother's industrial branch in 1973"
lab val moind14 moind48 moind52 moind63 moind73 ind

//Self-employment
ge faselfemp14=dadse14
lab var faselfemp "Father self-employed when respondent 14?"
ge faselfemp48=dadse48
lab var faselfemp48 "Father self-employed in 1948?"
ge faselfemp52=dadse52
lab var faselfemp52 "Father self-employed in 1952?"
ge faselfemp63=dadse63
lab var faselfemp63 "Father self-employed in 1963?"
ge faselfemp73=dadse73
lab var faselfemp73 "Father self-employed in 1973?"
lab val faselfemp14 faselfemp48 faselfemp52 faselfemp63 faselfemp73 selfemp

ge moselfemp14=momse14
lab var moselfemp "Mother self-employed when respondent 14?"
ge moselfemp48=momse48
lab var moselfemp48 "Mother self-employed in 1948?"
ge moselfemp52=momse52
lab var moselfemp52 "Mother self-employed in 1952?"
ge moselfemp63=momse63
lab var moselfemp63 "Mother self-employed in 1963?"
ge moselfemp73=momse73
lab var moselfemp73 "Mother self-employed in 1973?"
lab val moselfemp14 moselfemp48 moselfemp52 moselfemp63 moselfemp73 selfemp

//Number of people supervised
ge fanumsp14=dadsub14
lab var fanumsp14 "Number of persons supervised by father when respondent 14"
ge fanumsp48=dadsub48
lab var fanumsp48 "Number of persons supervised by father in 1948"
ge fanumsp52=dadsub52
lab var fanumsp52 "Number of persons supervised by father in 1952"
ge fanumsp63=dadsub63
lab var fanumsp63 "Number of persons supervised by father in 1963"
ge fanumsp73=dadsub73
lab var fanumsp73 "Number of persons supervised by father in 1973"
lab val fanumsp14 fanumsp48 fanumsp52 fanumsp63 fanumsp73 numsp

ge monumsp14=momsub14
lab var monumsp14 "Number of persons supervised by mother when respondent 14"
ge monumsp48=momsub48
lab var monumsp48 "Number of persons supervised by mother in 1948"
ge monumsp52=momsub52
lab var monumsp52 "Number of persons supervised by mother in 1952"
ge monumsp63=momsub63
lab var monumsp63 "Number of persons supervised by mother in 1963"
ge monumsp73=momsub73
lab var monumsp73 "Number of persons supervised by mother in 1973"
lab val monumsp14 monumsp48 monumsp52 monumsp63 monumsp73 numsp


** 10. Tabulate the Identified Variables **

log using /*insert you work directory here*/, replace text

** Data reading and variable selection from raw data
** Social Stratification in Eastern Europe After 1989: Gerneral Population Survey (1993)- Russia

** Sex **
tab sex

** Age, Birth Year **
sum age birthyr, d

** Siblings **
sum nsibs, d

** R's Own Education **
tab1 educ

** Parental Education **
tab1 faeduc moeduc 

** R's Own Occupation **
tab1 occ_ISCO occ_ISEI occ_EGP occ_SIX ind org selfemp numsp

** Parental Occupation **
tab1 faact14 moact14 faocc14_ISCO moocc14_ISCO faind14 moind14 faselfemp14 moselfemp14 fanumsp14 monumsp14

log close

** 11. Keep the identified variables only

keep year country pid sex age birthyr ///
	 nsibs educ educ_yr faeduc faeduc_yr moeduc moeduc_yr ///
	 occ_ISCO lastocc_ISCO occ93_ISCO occ88_ISCO ///
	 occ_ISEI lastocc_ISEI occ93_ISEI occ88_ISEI ///
	 occ_EGP lastocc_EGP occ93_EGP occ88_EGP ///
	 occ_SIX lastocc_SIX occ93_SIX occ88_SIX ///
	 ind lastind ind93 ind88 org lastorg org93 org88 ///
	 selfemp lastselfemp selfemp93 selfemp88 ///
	 numsp lastnumsp numsp93 numsp88 ///
	 faact14 faact48 faact52 faact63 faact73 ///
	 moact14 moact48 moact52 moact63 moact73 ///
	 faocc14_ISCO faocc48_ISCO faocc52_ISCO faocc63_ISCO faocc73_ISCO ///
	 moocc14_ISCO moocc48_ISCO moocc52_ISCO moocc63_ISCO moocc73_ISCO ///
     faind14 faind48 faind52 faind63 faind73 ///
	 moind14 moind48 moind52 moind63 moind73 ///
	 faselfemp14 faselfemp48 faselfemp52 faselfemp63 faselfemp73 ///
	 moselfemp14 moselfemp48 moselfemp52 moselfemp63 moselfemp73 ///
	 fanumsp14 fanumsp48 fanumsp52 fanumsp63 fanumsp73 ///
	 monumsp14 monumsp48 monumsp52 monumsp63 monumsp73
	 

** 12. Save the Data File **

saveold /*insert you work directory here*/, replace



** 13. Homoginising education (by Manting Chen, 2017/4/3)**
** Own Education **
rename educ educ_cat
rename educ_yr educ_yrs
//code missing variables
replace educ_yrs=. if educ_yrs==-2

ge educ_ISCED=020 if educ_cat==1
replace educ_ISCED=100 if educ_cat==2
replace educ_ISCED=100 if educ_cat==3
replace educ_ISCED=244 if educ_cat==4
replace educ_ISCED=351 if educ_cat==5
replace educ_ISCED=344 if educ_cat==6
replace educ_ISCED=300 if educ_cat==7
replace educ_ISCED=500 if educ_cat==8
lab var educ_ISCED "respondent highest education in ISCED 2011 code"

replace educ_ISCED=. if educ_cat==-2  

** Parents Education **
//father's education is actually father's
ge faeduc_flag=1 

rename faeduc faeduc_cat
rename faeduc_yr faeduc_yrs
replace faeduc_yrs=. if faeduc_yrs==-2 | faeduc_yrs==-8 | faeduc_yrs==-7 | faeduc_yrs==-1
rename moeduc maeduc_cat
rename moeduc_yr maeduc_yrs
replace maeduc_yrs=. if maeduc_yrs==-2 | maeduc_yrs==-8 | maeduc_yrs==-7 | maeduc_yrs==-1

ge faeduc_ISCED=000 if faeduc_cat==0
replace faeduc_ISCED=020 if faeduc_cat==1
replace faeduc_ISCED=100 if faeduc_cat==2
replace faeduc_ISCED=100 if faeduc_cat==3
replace faeduc_ISCED=100 if faeduc_cat==4
replace faeduc_ISCED=350 if faeduc_cat==5
replace faeduc_ISCED=340 if faeduc_cat==6
replace faeduc_ISCED=300 if faeduc_cat==7
replace faeduc_ISCED=500 if faeduc_cat==8
replace faeduc_ISCED=700 if faeduc_cat==9
lab var faeduc_ISCED "father highest education in ISCED 2011 code"

ge maeduc_ISCED=000 if maeduc_cat==0
replace maeduc_ISCED=020 if maeduc_cat==1
replace maeduc_ISCED=100 if maeduc_cat==2
replace maeduc_ISCED=100 if maeduc_cat==3
replace maeduc_ISCED=100 if maeduc_cat==4
replace maeduc_ISCED=350 if maeduc_cat==5
replace maeduc_ISCED=340 if maeduc_cat==6
replace maeduc_ISCED=300 if maeduc_cat==7
replace maeduc_ISCED=500 if maeduc_cat==8
replace maeduc_ISCED=700 if maeduc_cat==9
lab var maeduc_ISCED "mother highest education in ISCED 2011 code"


** 14. Homoginising sibling**
//cutoff
ge nsibs_flag=99
lab def nsibs_flag 99 "no cutoff"
lab val nsibs_flag nsibs_flag
lab var nsibs_flag "cutoff of total number of siblings"

//code missing
replace nsibs=. if nsibs<0

//number of brothers/sisters not available


** 15. Tab Education and Sibling Variables **
tab1 sex age birthyr
tab1 educ_cat educ_yrs faeduc_cat faeduc_yrs maeduc_cat maeduc_yrs faeduc_flag 
tab1 nsibs nsibs_flag


** 16. Save the Data File **

saveold /*insert you work directory here*/, replace
